In [1]:
import numpy as np
import pandas as pd
import copy
import geopandas as gpd
import plotly.graph_objs as go
import plotly as py
from plotly.offline import init_notebook_mode, iplot
import matplotlib.pyplot as plt
In [2]:
states = pd.read_excel("./messy_data/states.xlsx")
states.head()
Out[2]:
State Abbreviation
0 Alabama AL
1 Alaska AK
2 Arizona AZ
3 Arkansas AR
4 California CA
In [3]:
states.drop(states.index[states.Abbreviation == "DC"], inplace=True)
states.columns = ["Jurisdiction", "Jurisdiction Abbreviation"]
states = states.append({"Jurisdiction" : "Federal", "Jurisdiction Abbreviation" : "FED"}, ignore_index=True)
states.tail()
Out[3]:
Jurisdiction Jurisdiction Abbreviation
47 West Virginia WV
48 Wisconsin WI
49 Wyoming WY
50 Puerto Rico PR
51 Federal FED
In [4]:
malePrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Male", header=9, nrows=54).dropna(1, "all").dropna(0)
malePrisonerPopulation.head()
Out[4]:
Jurisdiction 1978 1979 1980 1981 1982 1983 1984 1985 1986 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
1 Federal 27975.0 24835.0 22964.0 26574.0 28046.0 30172.0 32267.0 37819.0 41575.0 ... 186280 188007 194493 196222 202462 203766 201697 196568 183502 176495
3 Alabama 5368.0 5221.0 6278.0 7339.0 8757.0 9375.0 9961.0 10453.0 11094.0 ... 27254 28277 29419 29261 29696 29782 29660 29182 28220 26506
4 Alaska 678.0 729.0 801.0 977.0 1276.0 1557.0 1888.0 2207.0 2344.0 ... 4603 4511 4696 4782 4932 4934 4450 5091 4761 4024
5 Arizona 3275.0 3573.0 4153.0 4986.0 5788.0 6566.0 7482.0 8134.0 8948.0 ... 34286 35739 36768 36521 36470 36447 37402 38295 38738 38323
6 Arkansas 2553.0 2927.0 2805.0 3197.0 3755.0 4050.0 4286.0 4394.0 4477.0 ... 13248 13656 14147 15040 14995 13594 15904 16476 16305 16161

5 rows × 40 columns

In [5]:
femalePrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Female", header=9, nrows=54).dropna(1, "all").dropna(0)
femalePrisonerPopulation.head()
Out[5]:
Jurisdiction 1978 1979 1980 1981 1982 1983 1984 1985 1986 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
1 Federal 1828.0 1536.0 1399.0 1559.0 1627.0 1754.0 1996.0 2404.0 2833.0 ... 13338 13273 13625 13549 13900 14049 14169 13999 12953 12697
3 Alabama 257.0 243.0 265.0 318.0 476.0 481.0 521.0 562.0 616.0 ... 2158 2231 2455 2503 2574 2649 2721 2589 2590 2377
4 Alaska 34.0 31.0 21.0 47.0 46.0 74.0 79.0 122.0 116.0 ... 564 503 589 609 665 699 631 703 577 410
5 Arizona 181.0 176.0 219.0 237.0 281.0 323.0 363.0 397.0 486.0 ... 3460 3763 3776 3688 3550 3633 3775 3964 3981 3997
6 Arkansas 101.0 115.0 106.0 131.0 167.0 196.0 196.0 217.0 224.0 ... 1066 1060 1061 1164 1113 1060 1331 1398 1402 1376

5 rows × 40 columns

In [6]:
totalPrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Total", header=9, nrows=54).dropna(1, "all").dropna(0)
totalPrisonerPopulation.head()
Out[6]:
Jurisdiction 1978 1979 1980 1981 1982 1983 1984 1985 1986 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
1 Federal 29803.0 26371.0 24363.0 28133.0 29673.0 31926.0 34263.0 40223.0 44408.0 ... 199618 201280 208118 209771 216362 217815 215866 210567 196455 189192
3 Alabama 5625.0 5464.0 6543.0 7657.0 9233.0 9856.0 10482.0 11015.0 11710.0 ... 29412 30508 31874 31764 32270 32431 32381 31771 30810 28883
4 Alaska 712.0 760.0 822.0 1024.0 1322.0 1631.0 1967.0 2329.0 2460.0 ... 5167 5014 5285 5391 5597 5633 5081 5794 5338 4434
5 Arizona 3456.0 3749.0 4372.0 5223.0 6069.0 6889.0 7845.0 8531.0 9434.0 ... 37746 39502 40544 40209 40020 40080 41177 42259 42719 42320
6 Arkansas 2654.0 3042.0 2911.0 3328.0 3922.0 4246.0 4482.0 4611.0 4701.0 ... 14314 14716 15208 16204 16108 14654 17235 17874 17707 17537

5 rows × 40 columns

In [7]:
def cleanAndMeltPopulationTable(table, states):
    
    # add DC data to Federal, and delete it
    table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] = table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] + table.loc[table.Jurisdiction == "District of Columbia"].iloc[:, 1:].replace("--", 0)
    table.drop(table.index[table.Jurisdiction == "District of Columbia"], inplace=True)

    table = table.melt(id_vars = "Jurisdiction", var_name = "Year", value_name = "Population")
    table.Year = table.Year.astype(int)
    
    table = states.merge(table, on="Jurisdiction")
    
    return table
In [8]:
meltedMalePrisonerPopulation = cleanAndMeltPopulationTable(malePrisonerPopulation, states)
meltedFemalePrisonerPopulation = cleanAndMeltPopulationTable(femalePrisonerPopulation, states)
meltedTotalPrisonerPopulation = cleanAndMeltPopulationTable(totalPrisonerPopulation, states)

meltedTotalPrisonerPopulation.head()
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

Out[8]:
Jurisdiction Jurisdiction Abbreviation Year Population
0 Alabama AL 1978 5625
1 Alabama AL 1979 5464
2 Alabama AL 1980 6543
3 Alabama AL 1981 7657
4 Alabama AL 1982 9233
In [9]:
regions = pd.read_excel("./messy_data/state_region.xlsx")
regions.head()
Out[9]:
State Division Region
0 Connecticut New England Northeast
1 Maine New England Northeast
2 Massachusetts New England Northeast
3 New Hampshire New England Northeast
4 Rhode Island New England Northeast
In [10]:
regions.drop(regions.index[regions.State == "District of Columbia"], inplace=True)
In [11]:
def aggregateRegionalPopulationSum(table, regions):
    
    table = regions.merge(table, left_on="State", right_on="Jurisdiction", how="right")
    temp_index = table["Jurisdiction"] == "Federal"
    table.loc[temp_index, "Region"] = "Federal"
    table.loc[temp_index, "Division"] = "Federal"
    
    regionSum = table.groupby(["Year", "Region"]).Population.sum()
    divisionSum = table.groupby(["Year", "Region", "Division"]).Population.sum()
    
    return regionSum, divisionSum

    
In [12]:
malePrisonerPopulationRegionSum, malePrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(meltedMalePrisonerPopulation, regions)
femalePrisonerPopulationRegionSum, femalePrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(meltedFemalePrisonerPopulation, regions)
totalPrisonerPopulationRegionSum, totalPrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(meltedTotalPrisonerPopulation, regions)
In [13]:
def plotAnnualStatePopulation(table, gender):
    
    table = table[table.Jurisdiction != "Federal"]

    cmax = table.Population.max()
    cmin = table.Population.min()
    
    years = table.Year.unique()

    heatMapData = [{"type" : "choropleth",  "locations" : table.loc[table.Year == year, "Jurisdiction Abbreviation"], "locationmode" : "USA-states", "colorscale" : "Viridis", "zmin" : cmin, "zmax" : cmax, "z" : table.Population[table.Year == year].astype(float)} for year in years]
    mapLayout = [{"geo" : {"scope" : 'usa'}, "title" : gender + " Prisoner Population by State, " + str(year)} for year in years]
    
    for i, data in enumerate(heatMapData):
        usHeatMap = go.Figure(data=[data], layout=mapLayout[i])
        iplot(usHeatMap)
In [14]:
# gender = "Total"

# cmax = table.Population.max()
# cmin = table.Population.min()

# heatMapData = [dict(type = "choropleth", locations=table.loc[table.Year == year, "Jurisdiction Abbreviation"], locationmode="USA-states", colorscale="Viridis", zmin=cmin, zmax=cmax, z=table.Population[table.Year == year].astype(float), text = str(year), geo = 'geo'+str(i+1) if i != 0 else 'geo') for i, year in enumerate(years)]
# mapLayout = {"geo" + (str(i + 1) if i != 0 else "" ) : {"scope" : 'usa', "domain" : dict(x=[0, 1], y=[i / len(years), (i + 1) / len(years)])} for i in range(len(years))}
# fig = {'data' : heatMapData, 'layout' : mapLayout}

# iplot(fig)
In [15]:
init_notebook_mode()
In [16]:
plotAnnualStatePopulation(meltedMalePrisonerPopulation, "Male")
In [17]:
plotAnnualStatePopulation(meltedFemalePrisonerPopulation, "Female")
In [18]:
plotAnnualStatePopulation(meltedTotalPrisonerPopulation, "Total")
In [19]:
usMap = gpd.read_file("./us_states_map.json")
usMap
Out[19]:
GEO_ID STATE NAME LSAD CENSUSAREA geometry
0 0400000US04 04 Arizona 113594.084 POLYGON ((-112.538593 37.000674, -112.534545 3...
1 0400000US05 05 Arkansas 52035.477 POLYGON ((-94.042964 33.019219, -94.043036 33....
2 0400000US06 06 California 155779.220 (POLYGON ((-120.248484 33.999329, -120.247393 ...
3 0400000US08 08 Colorado 103641.888 POLYGON ((-107.317794 41.002957, -107.000606 4...
4 0400000US09 09 Connecticut 4842.355 POLYGON ((-72.397428 42.033302, -72.1988280000...
5 0400000US11 11 District of Columbia 61.048 POLYGON ((-77.03298599999999 38.8395, -77.0316...
6 0400000US13 13 Georgia 57513.485 POLYGON ((-84.81047700000001 34.987607, -84.80...
7 0400000US15 15 Hawaii 6422.628 (POLYGON ((-155.778234 20.245743, -155.772734 ...
8 0400000US17 17 Illinois 55518.930 POLYGON ((-89.36603100000001 42.500274, -89.36...
9 0400000US18 18 Indiana 35826.109 POLYGON ((-84.804119 40.352757, -84.803917 40....
10 0400000US22 22 Louisiana 43203.905 (POLYGON ((-88.865067 29.752714, -88.889754999...
11 0400000US27 27 Minnesota 79626.743 POLYGON ((-92.191501 46.672586, -92.1971450000...
12 0400000US28 28 Mississippi 46923.274 (POLYGON ((-89.095623 30.231767, -89.077259 30...
13 0400000US30 30 Montana 145545.801 POLYGON ((-111.044275 45.001345, -111.056207 4...
14 0400000US35 35 New Mexico 121298.148 POLYGON ((-105.998003 32.002328, -106.09976 32...
15 0400000US38 38 North Dakota 69000.798 POLYGON ((-100.511949 45.943654, -100.627681 4...
16 0400000US40 40 Oklahoma 68594.921 POLYGON ((-100.000381 34.746358, -100.000381 3...
17 0400000US42 42 Pennsylvania 44742.703 POLYGON ((-79.476662 39.721078, -79.608223 39....
18 0400000US47 47 Tennessee 41234.896 POLYGON ((-83.47210800000001 36.597284, -83.27...
19 0400000US51 51 Virginia 39490.086 (POLYGON ((-75.242266 38.027209, -75.296871 37...
20 0400000US72 72 Puerto Rico 3423.775 (POLYGON ((-65.280764 18.288274, -65.283269 18...
21 0400000US10 10 Delaware 1948.543 (POLYGON ((-75.564927 39.583248, -75.576271000...
22 0400000US54 54 West Virginia 24038.210 POLYGON ((-78.571901 39.031995, -78.565837 39....
23 0400000US55 55 Wisconsin 54157.805 (POLYGON ((-90.456677 47.016737, -90.455299999...
24 0400000US56 56 Wyoming 97093.141 POLYGON ((-104.055077 43.936535, -104.055104 4...
25 0400000US01 01 Alabama 50645.326 (POLYGON ((-88.124658 30.28364, -88.0868119999...
26 0400000US02 02 Alaska 570640.950 (POLYGON ((-162.255031 54.978353, -162.249682 ...
27 0400000US12 12 Florida 53624.759 (POLYGON ((-80.250581 25.34193, -80.2549159999...
28 0400000US16 16 Idaho 82643.117 POLYGON ((-111.048974 44.474072, -111.049194 4...
29 0400000US20 20 Kansas 81758.717 POLYGON ((-99.541116 36.999573, -99.5580680000...
30 0400000US24 24 Maryland 9707.241 (POLYGON ((-76.048373 38.12055, -76.056811 38....
31 0400000US34 34 New Jersey 7354.220 POLYGON ((-74.90023600000001 40.077149, -74.83...
32 0400000US37 37 North Carolina 48617.905 (POLYGON ((-75.753765 35.199612, -75.74522 35....
33 0400000US45 45 South Carolina 30060.696 POLYGON ((-82.216217 35.196044, -82.195483 35....
34 0400000US53 53 Washington 66455.521 (POLYGON ((-122.397349 47.912401, -122.419274 ...
35 0400000US50 50 Vermont 9216.657 POLYGON ((-72.458519 42.726853, -72.86418 42.7...
36 0400000US49 49 Utah 82169.620 POLYGON ((-111.046689 42.001567, -111.046402 4...
37 0400000US19 19 Iowa 55857.130 POLYGON ((-91.16306400000001 42.986781, -91.14...
38 0400000US21 21 Kentucky 39486.338 (POLYGON ((-89.5391 36.498201, -89.560344 36.5...
39 0400000US23 23 Maine 30842.923 (POLYGON ((-69.307908 43.773767, -69.306751000...
40 0400000US25 25 Massachusetts 7800.058 (POLYGON ((-70.821001 41.587268, -70.821743 41...
41 0400000US26 26 Michigan 56538.901 (POLYGON ((-85.566441 45.760222, -85.54956 45....
42 0400000US29 29 Missouri 68741.522 POLYGON ((-89.545006 36.336809, -89.560439 36....
43 0400000US31 31 Nebraska 76824.171 POLYGON ((-104.05283 41.697954, -104.052774 41...
44 0400000US32 32 Nevada 109781.180 POLYGON ((-114.046555 40.116931, -114.047134 3...
45 0400000US33 33 New Hampshire 8952.651 POLYGON ((-70.81954899999999 43.123231, -70.78...
46 0400000US36 36 New York 47126.399 (POLYGON ((-73.77336099999999 40.859449, -73.7...
47 0400000US39 39 Ohio 40860.694 (POLYGON ((-82.700208 41.61219, -82.691123 41....
48 0400000US41 41 Oregon 95988.013 POLYGON ((-121.908267 45.654399, -121.900858 4...
49 0400000US44 44 Rhode Island 1033.814 (POLYGON ((-71.38358599999999 41.464782, -71.3...
50 0400000US46 46 South Dakota 75811.000 POLYGON ((-104.055104 43.853478, -104.055077 4...
51 0400000US48 48 Texas 261231.711 (POLYGON ((-96.83002999999999 28.111842, -96.8...
In [20]:
usMap = usMap.loc[list(map(lambda x: x not in ["Puerto Rico", "District of Columbia"], usMap.NAME))]
usMap
Out[20]:
GEO_ID STATE NAME LSAD CENSUSAREA geometry
0 0400000US04 04 Arizona 113594.084 POLYGON ((-112.538593 37.000674, -112.534545 3...
1 0400000US05 05 Arkansas 52035.477 POLYGON ((-94.042964 33.019219, -94.043036 33....
2 0400000US06 06 California 155779.220 (POLYGON ((-120.248484 33.999329, -120.247393 ...
3 0400000US08 08 Colorado 103641.888 POLYGON ((-107.317794 41.002957, -107.000606 4...
4 0400000US09 09 Connecticut 4842.355 POLYGON ((-72.397428 42.033302, -72.1988280000...
6 0400000US13 13 Georgia 57513.485 POLYGON ((-84.81047700000001 34.987607, -84.80...
7 0400000US15 15 Hawaii 6422.628 (POLYGON ((-155.778234 20.245743, -155.772734 ...
8 0400000US17 17 Illinois 55518.930 POLYGON ((-89.36603100000001 42.500274, -89.36...
9 0400000US18 18 Indiana 35826.109 POLYGON ((-84.804119 40.352757, -84.803917 40....
10 0400000US22 22 Louisiana 43203.905 (POLYGON ((-88.865067 29.752714, -88.889754999...
11 0400000US27 27 Minnesota 79626.743 POLYGON ((-92.191501 46.672586, -92.1971450000...
12 0400000US28 28 Mississippi 46923.274 (POLYGON ((-89.095623 30.231767, -89.077259 30...
13 0400000US30 30 Montana 145545.801 POLYGON ((-111.044275 45.001345, -111.056207 4...
14 0400000US35 35 New Mexico 121298.148 POLYGON ((-105.998003 32.002328, -106.09976 32...
15 0400000US38 38 North Dakota 69000.798 POLYGON ((-100.511949 45.943654, -100.627681 4...
16 0400000US40 40 Oklahoma 68594.921 POLYGON ((-100.000381 34.746358, -100.000381 3...
17 0400000US42 42 Pennsylvania 44742.703 POLYGON ((-79.476662 39.721078, -79.608223 39....
18 0400000US47 47 Tennessee 41234.896 POLYGON ((-83.47210800000001 36.597284, -83.27...
19 0400000US51 51 Virginia 39490.086 (POLYGON ((-75.242266 38.027209, -75.296871 37...
21 0400000US10 10 Delaware 1948.543 (POLYGON ((-75.564927 39.583248, -75.576271000...
22 0400000US54 54 West Virginia 24038.210 POLYGON ((-78.571901 39.031995, -78.565837 39....
23 0400000US55 55 Wisconsin 54157.805 (POLYGON ((-90.456677 47.016737, -90.455299999...
24 0400000US56 56 Wyoming 97093.141 POLYGON ((-104.055077 43.936535, -104.055104 4...
25 0400000US01 01 Alabama 50645.326 (POLYGON ((-88.124658 30.28364, -88.0868119999...
26 0400000US02 02 Alaska 570640.950 (POLYGON ((-162.255031 54.978353, -162.249682 ...
27 0400000US12 12 Florida 53624.759 (POLYGON ((-80.250581 25.34193, -80.2549159999...
28 0400000US16 16 Idaho 82643.117 POLYGON ((-111.048974 44.474072, -111.049194 4...
29 0400000US20 20 Kansas 81758.717 POLYGON ((-99.541116 36.999573, -99.5580680000...
30 0400000US24 24 Maryland 9707.241 (POLYGON ((-76.048373 38.12055, -76.056811 38....
31 0400000US34 34 New Jersey 7354.220 POLYGON ((-74.90023600000001 40.077149, -74.83...
32 0400000US37 37 North Carolina 48617.905 (POLYGON ((-75.753765 35.199612, -75.74522 35....
33 0400000US45 45 South Carolina 30060.696 POLYGON ((-82.216217 35.196044, -82.195483 35....
34 0400000US53 53 Washington 66455.521 (POLYGON ((-122.397349 47.912401, -122.419274 ...
35 0400000US50 50 Vermont 9216.657 POLYGON ((-72.458519 42.726853, -72.86418 42.7...
36 0400000US49 49 Utah 82169.620 POLYGON ((-111.046689 42.001567, -111.046402 4...
37 0400000US19 19 Iowa 55857.130 POLYGON ((-91.16306400000001 42.986781, -91.14...
38 0400000US21 21 Kentucky 39486.338 (POLYGON ((-89.5391 36.498201, -89.560344 36.5...
39 0400000US23 23 Maine 30842.923 (POLYGON ((-69.307908 43.773767, -69.306751000...
40 0400000US25 25 Massachusetts 7800.058 (POLYGON ((-70.821001 41.587268, -70.821743 41...
41 0400000US26 26 Michigan 56538.901 (POLYGON ((-85.566441 45.760222, -85.54956 45....
42 0400000US29 29 Missouri 68741.522 POLYGON ((-89.545006 36.336809, -89.560439 36....
43 0400000US31 31 Nebraska 76824.171 POLYGON ((-104.05283 41.697954, -104.052774 41...
44 0400000US32 32 Nevada 109781.180 POLYGON ((-114.046555 40.116931, -114.047134 3...
45 0400000US33 33 New Hampshire 8952.651 POLYGON ((-70.81954899999999 43.123231, -70.78...
46 0400000US36 36 New York 47126.399 (POLYGON ((-73.77336099999999 40.859449, -73.7...
47 0400000US39 39 Ohio 40860.694 (POLYGON ((-82.700208 41.61219, -82.691123 41....
48 0400000US41 41 Oregon 95988.013 POLYGON ((-121.908267 45.654399, -121.900858 4...
49 0400000US44 44 Rhode Island 1033.814 (POLYGON ((-71.38358599999999 41.464782, -71.3...
50 0400000US46 46 South Dakota 75811.000 POLYGON ((-104.055104 43.853478, -104.055077 4...
51 0400000US48 48 Texas 261231.711 (POLYGON ((-96.83002999999999 28.111842, -96.8...
In [21]:
def plotGeoPandasUSMap(usMap, title=None, dataColumn=None, dataLimit=(None, None)):
    
    fig, mapAx = plt.subplots(1, 1)
    
    mapLimitW, mapLimitE, mapLimitS, mapLimitN = -185, -65, 15, 75
    mapAx.axis((mapLimitW, mapLimitE, mapLimitS, mapLimitN))
    mapAx.axis('off')
    mapAx.set_aspect('equal', 'box')
    mapAx.set_title(title)
    
    usMap.plot(column=dataColumn, figsize=(20, 20), edgecolor="k", ax=mapAx, legend=True, vmin=dataLimit[0], vmax=dataLimit[1])
    
#     sm = plt.cm.ScalarMappable(cmap='viridis_r', norm=plt.Normalize(vmin=dataLimit[0], vmax=dataLimit[1]))
#     sm._A = []
#     cbar = mapAx.colorbar(sm)
In [22]:
usMap = usMap.merge(states, left_on="NAME", right_on = "Jurisdiction", how="left").drop("Jurisdiction", axis=1)
usMap.columns = ["GEO_ID", "STATE", "NAME", "LSAD", "CENSUSAREA", "geometry", "Abbreviatio"]
usMap.head()
Out[22]:
GEO_ID STATE NAME LSAD CENSUSAREA geometry Abbreviatio
0 0400000US04 04 Arizona 113594.084 POLYGON ((-112.538593 37.000674, -112.534545 3... AZ
1 0400000US05 05 Arkansas 52035.477 POLYGON ((-94.042964 33.019219, -94.043036 33.... AR
2 0400000US06 06 California 155779.220 (POLYGON ((-120.248484 33.999329, -120.247393 ... CA
3 0400000US08 08 Colorado 103641.888 POLYGON ((-107.317794 41.002957, -107.000606 4... CO
4 0400000US09 09 Connecticut 4842.355 POLYGON ((-72.397428 42.033302, -72.1988280000... CT
In [23]:
usMap = usMap.merge(regions, left_on="NAME", right_on = "State", how="left").drop("State", axis=1)
usMap.head()
Out[23]:
GEO_ID STATE NAME LSAD CENSUSAREA geometry Abbreviatio Division Region
0 0400000US04 04 Arizona 113594.084 POLYGON ((-112.538593 37.000674, -112.534545 3... AZ Mountain West
1 0400000US05 05 Arkansas 52035.477 POLYGON ((-94.042964 33.019219, -94.043036 33.... AR West South Central South
2 0400000US06 06 California 155779.220 (POLYGON ((-120.248484 33.999329, -120.247393 ... CA Pacific West
3 0400000US08 08 Colorado 103641.888 POLYGON ((-107.317794 41.002957, -107.000606 4... CO Mountain West
4 0400000US09 09 Connecticut 4842.355 POLYGON ((-72.397428 42.033302, -72.1988280000... CT New England Northeast
In [24]:
def plotRegionalPopulation(table, usMap, gender):
    
    regionType = table.index.names[1]

    tableWOFederal = table[table.index.get_level_values(1) != "Federal"]

    years = table.index.get_level_values(0).unique()
    dividedTableWOFederal = [tableWOFederal[year] for year in years]

    usRegionMap = usMap[[regionType, "geometry"]].dissolve(regionType)

    colorlimit = (tableWOFederal.min(), tableWOFederal.max())
    for i, oneYearTable in enumerate(dividedTableWOFederal):
        year = years[i]

        oneYearTable = usRegionMap.join(oneYearTable)

        figtitle = gender + " Prisoner Population by " + regionType + ", " + str(year)
        plotGeoPandasUSMap(oneYearTable, figtitle, "Population", colorlimit)


    figtitle = gender + " Prisoner Population by " + regionType
    ax = table.unstack().plot(title = figtitle)
    ax.set_ylim(bottom=0)
    ax.tick_params(axis = "y", length=0)
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=True, shadow=True, ncol=len(table.index.get_level_values(1).unique()))
In [25]:
plotRegionalPopulation(malePrisonerPopulationRegionSum, usMap, "Male")
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/ipykernel_launcher.py:3: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/pandas/plotting/_core.py:304: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

In [26]:
plotRegionalPopulation(femalePrisonerPopulationRegionSum, usMap, "Female")
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/ipykernel_launcher.py:3: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

In [27]:
plotRegionalPopulation(totalPrisonerPopulationRegionSum, usMap, "Total")
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/ipykernel_launcher.py:3: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

In [28]:
def dropRegionIndex(table):
    table = copy.deepcopy(table)
    
    table.index = table.index.droplevel(1)
    return table


plotRegionalPopulation(dropRegionIndex(malePrisonerPopulationDivisionSum), usMap, "Male")
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/ipykernel_launcher.py:3: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

In [29]:
plotRegionalPopulation(dropRegionIndex(femalePrisonerPopulationDivisionSum), usMap, "Female")
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/ipykernel_launcher.py:3: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

In [30]:
plotRegionalPopulation(dropRegionIndex(totalPrisonerPopulationDivisionSum), usMap, "Total")
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/ipykernel_launcher.py:3: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

In [31]:
custodyPopulation_11_16 = pd.read_excel("./messy_data/Prison occupation rate.xlsx", "Custody Population", na_values="/")
occupancyLowRate_11_16 = pd.read_excel("./messy_data/Prison occupation rate.xlsx", "Highest Capacity", na_values="/")
occupancyHighRate_11_16 = pd.read_excel("./messy_data/Prison occupation rate.xlsx", "Lowest Capacity", na_values="/")
In [32]:
custodyPopulation_11_16
Out[32]:
Jurisdiction 2011 2012 2013 2014 2015 2016
0 Federal 176228 176658 174242.0 169840 160946 154339
1 Alabama 26268 26230 26271.0 25664 24814 23397
2 Alaska 3708 4575 5054.0 5188 5247 4378
3 Arizona 33492 33578 34626.0 35181 35733 42248
4 Arkansas 14090 14043 14295.0 15250 15784 15833
5 California 138274 123090 122798.0 119071 116569 117557
6 Colorado 17559 16389 16286.0 16687 15972 15922
7 Connecticut 17022 16347 16594.0 16167 15500 14532
8 Delaware 6546 6730 6798.0 6730 6437 6334
9 Florida 100861 99835 100940.0 100873 99485 85834
10 Georgia 52844 55178 53701.0 52719 52002 53433
11 Hawaii 3687 3661 3752.0 3965 4073 3777
12 Idaho 7470 7715 7219.0 7497 7238 7221
13 Illinois 48427 49348 48653.0 48278 46240 43616
14 Indiana 24450 23783 28495.0 28073 26586 25143
15 Iowa 9115 8735 8106.0 8209 8230 8378
16 Kansas 9256 9422 9515.0 9539 9533 9653
17 Kentucky 11951 12186 12141.0 12114 11959 11867
18 Louisiana 18844 18601 18794.0 18710 18447 17932
19 Maine 1978 1977 2073.0 2199 2190 2356
20 Maryland 22923 21783 21676.0 21236 20921 20211
21 Massachusetts 11467 11127 10622.0 10447 9493 9038
22 Michigan 42904 43594 43704.0 43359 42628 41122
23 Minnesota 9309 9421 9391.0 9576 9578 9509
24 Mississippi 15390 15791 15591.0 13069 13967 13720
25 Missouri 30969 31205 31499.0 31903 32295 32427
26 Montana 1707 1677 1666.0 1687 1686 1718
27 Nebraska 4657 4721 5012.0 5228 5133 5167
28 Nevada 12159 12594 NaN 12693 13235 13932
29 New Hampshire 2423 2568 2848.0 2723 2661 2599
30 New Jersey 20755 20333 19528.0 18633 17431 16738
31 New Mexico 3834 3641 3783.0 3876 4078 3956
32 New York 55196 54058 53312.0 52362 51485 50611
33 North Carolina 39662 37378 37176.0 37348 36888 35970
34 North Dakota 1385 1413 1571.0 1325 1345 1379
35 Ohio 47957 45529 46224.0 46151 46190 45913
36 Oklahoma 17724 17947 18313.0 19126 19875 19218
37 Oregon 13728 14123 14605.0 14492 14655 14579
38 Pennsylvania 48515 49009 49735.0 48538 48241 48287
39 Rhode Island 3032 3042 3168.0 3133 2982 2887
40 South Carolina 22343 21867 21534.0 20948 20457 20376
41 South Dakota 3551 3582 3596.0 3497 3514 3770
42 Tennessee 14684 14652 15655.0 15699 14628 14106
43 Texas 141353 136578 140839.0 139879 138199 137584
44 Utah 5294 5309 5382.0 5307 4831 4502
45 Vermont 1531 1530 1579.0 1548 1509 1471
46 Virginia 28962 28149 28431.0 28480 30430 29882
47 Washington 17109 16919 17760.0 17180 17222 17228
48 West Virginia 5149 5335 5708.0 5867 5925 5899
49 Wisconsin 22381 22401 22443.0 22572 22914 23163
50 Wyoming 1917 1951 2036.0 2114 2133 2083
In [33]:
def cleanAndMeltCustodyOccupancyTable(table, states, tableType):
    
    table = table.melt(id_vars = "Jurisdiction", var_name = "Year", value_name = tableType)
    if tableType == "Occupancy":
        table.Occupancy = table.Occupancy / 100
    
    table.Year = table.Year.astype(int)
    table = states.merge(table, on="Jurisdiction")
    
    return table
In [34]:
meltedCustodyPopulation_11_16 = cleanAndMeltCustodyOccupancyTable(custodyPopulation_11_16, states, "Population")
meltedOccupancyLowRate_11_16 = cleanAndMeltCustodyOccupancyTable(occupancyLowRate_11_16, states, "Occupancy")
meltedOccupancyHighRate_11_16 = cleanAndMeltCustodyOccupancyTable(occupancyHighRate_11_16, states, "Occupancy")
In [35]:
meltedCustodyPopulation_11_16.head()
Out[35]:
Jurisdiction Jurisdiction Abbreviation Year Population
0 Alabama AL 2011 26268.0
1 Alabama AL 2012 26230.0
2 Alabama AL 2013 26271.0
3 Alabama AL 2014 25664.0
4 Alabama AL 2015 24814.0
In [36]:
meltedOccupancyHighRate_11_16.head()
Out[36]:
Jurisdiction Jurisdiction Abbreviation Year Occupancy
0 Alabama AL 2011 1.960000
1 Alabama AL 2012 1.957000
2 Alabama AL 2013 1.972593
3 Alabama AL 2014 1.927016
4 Alabama AL 2015 1.863000
In [37]:
def plotAnnualStateOccupancy(table, low_or_high):
    
    table = table[table.Occupancy != "Federal"]

    cmax = table.Occupancy.max()
    cmin = table.Occupancy.min()
    
    years = table.Year.unique()

    heatMapData = [{"type" : "choropleth",  "locations" : table.loc[table.Year == year, "Jurisdiction Abbreviation"], "locationmode" : "USA-states", "colorscale" : "Viridis", "zmin" : cmin, "zmax" : cmax, "z" : table.Occupancy[table.Year == year].astype(float)} for year in years]
    mapLayout = [{"geo" : {"scope" : 'usa'}, "title" : "Prison Occupancy (" + low_or_high + " Estimation) by State, " + str(year)} for year in years]
    
    for i, data in enumerate(heatMapData):
        usHeatMap = go.Figure(data=[data], layout=mapLayout[i])
        iplot(usHeatMap)
In [38]:
plotAnnualStateOccupancy(meltedOccupancyLowRate_11_16, "Low")
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/pandas/core/ops.py:1649: FutureWarning:

elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison

In [39]:
plotAnnualStateOccupancy(meltedOccupancyHighRate_11_16, "High")
In [40]:
def aggregateRegionalOccupancy(table, populationTable, regions):
    
    table = regions.merge(table.dropna(), left_on="State", right_on="Jurisdiction", how="right")
    table = table.merge(populationTable[["Jurisdiction", "Year", "Population"]].dropna(), on=["Jurisdiction", "Year"], how="left")

    temp_index = table["Jurisdiction"] == "Federal"
    table.loc[temp_index, "Region"] = "Federal"
    table.loc[temp_index, "Division"] = "Federal"
    table["Capacity"] = table.Population / table.Occupancy
    
    regionCapacity = table.groupby(["Year", "Region"]).Capacity.sum()
    divisionCapacity = table.groupby(["Year", "Region", "Division"]).Capacity.sum()
    
    regionPopulation = table.groupby(["Year", "Region"]).Population.sum()
    divisionPopulation = table.groupby(["Year", "Region", "Division"]).Population.sum()
    
    regionOccupancy = (regionPopulation / regionCapacity).rename("Occupancy")
    divisionOccupancy = (divisionPopulation / divisionCapacity).rename("Occupancy")
    
    return regionOccupancy, divisionOccupancy
In [41]:
regionLowOccupancy, divisionLowOccupancy = aggregateRegionalOccupancy(meltedOccupancyLowRate_11_16, meltedCustodyPopulation_11_16, regions)
regionHighOccupancy, divisionHighOccupancy = aggregateRegionalOccupancy(meltedOccupancyHighRate_11_16, meltedCustodyPopulation_11_16, regions)
In [42]:
regionLowOccupancy
Out[42]:
Year  Region   
2011  Federal      1.380000
      Midwest      1.077601
      Northeast    1.016331
      South        0.889082
      West         1.210281
2012  Federal      1.373000
      Midwest      1.034894
      Northeast    0.997858
      South        0.877879
      West         1.132735
2013  Federal      1.331037
      Midwest      1.115274
      Northeast    0.999224
      South        0.888064
      West         1.132297
2014  Federal      1.280000
      Midwest      1.109416
      Northeast    0.985143
      South        0.889829
      West         0.921820
2015  Federal      1.197000
      Midwest      1.095396
      Northeast    0.961326
      South        0.899980
      West         0.907239
2016  Federal      1.140430
      Midwest      1.043553
      Northeast    0.918713
      South        0.910598
      West         0.937919
Name: Occupancy, dtype: float64
In [43]:
divisionLowOccupancy
Out[43]:
Year  Region     Division          
2011  Federal    Federal               1.380000
      Midwest    East North Central    1.099671
                 West North Central    1.035526
      Northeast  Mid-Atlantic          1.001750
                 New England           1.115214
      South      East South Central    0.805975
                 South Atlantic        0.917843
                 West South Central    0.881235
      West       Mountain              0.860362
                 Pacific               1.448103
2012  Federal    Federal               1.373000
      Midwest    East North Central    1.031675
                 West North Central    1.043673
      Northeast  Mid-Atlantic          0.984457
                 New England           1.088153
      South      East South Central    0.796217
                 South Atlantic        0.914648
                 West South Central    0.859317
      West       Mountain              0.871882
                 Pacific               1.336828
2013  Federal    Federal               1.331037
      Midwest    East North Central    1.143384
                 West North Central    1.044429
      Northeast  Mid-Atlantic          0.987791
                 New England           1.074341
      South      East South Central    0.791955
                 South Atlantic        0.903849
                 West South Central    0.905175
      West       Mountain              0.860498
                 Pacific               1.318337
2014  Federal    Federal               1.280000
      Midwest    East North Central    1.138908
                 West North Central    1.036418
      Northeast  Mid-Atlantic          0.972415
                 New England           1.068524
      South      East South Central    0.768438
                 South Atlantic        0.909722
                 West South Central    0.911157
      West       Mountain              0.864469
                 Pacific               0.950348
2015  Federal    Federal               1.197000
      Midwest    East North Central    1.122013
                 West North Central    1.030593
      Northeast  Mid-Atlantic          0.953354
                 New England           1.014071
      South      East South Central    0.822564
                 South Atlantic        0.921198
                 West South Central    0.899393
      West       Mountain              0.846741
                 Pacific               0.937674
2016  Federal    Federal               1.140430
      Midwest    East North Central    1.048168
                 West North Central    1.034928
      Northeast  Mid-Atlantic          0.946959
                 New England           0.773357
      South      East South Central    0.818840
                 South Atlantic        0.954584
                 West South Central    0.888155
      West       Mountain              0.930806
                 Pacific               0.942105
Name: Occupancy, dtype: float64
In [44]:
def plotRegionalOccupancy(table, usMap, low_or_high):
    
    regionType = table.index.names[1]

    tableWOFederal = table[table.index.get_level_values(1) != "Federal"]

    years = table.index.get_level_values(0).unique()
    dividedTableWOFederal = [tableWOFederal[year] for year in years]

    usRegionMap = usMap[[regionType, "geometry"]].dissolve(regionType)

    colorlimit = (tableWOFederal.min(), tableWOFederal.max())
    for i, oneYearTable in enumerate(dividedTableWOFederal):
        year = years[i]
        
        oneYearTable = usRegionMap.join(oneYearTable)

        figtitle = "Prison Occupancy (" + low_or_high + " Estimate) by " + regionType + ", " + str(year)
        plotGeoPandasUSMap(oneYearTable, figtitle, "Occupancy", colorlimit)


    figtitle = "Prison Occupancy (" + low_or_high + " Estimate) by " + regionType
    ax = table.unstack().plot(title = figtitle)
    ax.tick_params(axis = "y", length=0)
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=True, shadow=True, ncol=len(table.index.get_level_values(1).unique()))
In [45]:
plotRegionalOccupancy(regionLowOccupancy, usMap, "Low")
In [46]:
plotRegionalOccupancy(regionHighOccupancy, usMap, "High")
In [47]:
plotRegionalOccupancy(dropRegionIndex(divisionLowOccupancy), usMap, "Low")
In [48]:
plotRegionalOccupancy(dropRegionIndex(divisionHighOccupancy), usMap, "High")
In [ ]: